import geopandas
import libpysalIntroduction to Area Unit Data
Spring 2023
Areal Unit Data
south = libpysal.examples.load_example('South')libpysal.examples.explain('South')south_gdf = geopandas.read_file(south.get_path('south.shp'))south_gdf.plot()<Axes: >

Geometry of the Data Set
south_gdf.crs<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
ax = south_gdf.plot()
ax.set_axis_off();
south_gdf.shape(1412, 70)
south_gdf.geometry0 POLYGON ((-80.62805 40.39816, -80.60204 40.480...
1 POLYGON ((-80.52625 40.16245, -80.58760 40.175...
2 POLYGON ((-80.52517 40.02275, -80.73843 40.035...
3 POLYGON ((-80.52447 39.72113, -80.83248 39.718...
4 POLYGON ((-75.77270 39.38301, -75.79144 39.723...
...
1407 POLYGON ((-79.14433 36.54606, -79.21706 36.549...
1408 POLYGON ((-79.43775 37.61596, -79.45834 37.603...
1409 POLYGON ((-80.12475 37.12510, -80.14045 37.128...
1410 POLYGON ((-76.39569 37.10771, -76.40270 37.090...
1411 POLYGON ((-77.53178 38.56506, -77.72094 38.840...
Name: geometry, Length: 1412, dtype: geometry
south_gdf.geometry.values[0]geom0 = south_gdf.geometry.values[0]dir(geom0)['__and__',
'__bool__',
'__class__',
'__delattr__',
'__dir__',
'__doc__',
'__eq__',
'__format__',
'__ge__',
'__geo_interface__',
'__getattribute__',
'__gt__',
'__hash__',
'__init__',
'__init_subclass__',
'__le__',
'__lt__',
'__module__',
'__ne__',
'__new__',
'__nonzero__',
'__or__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__setattr__',
'__setstate__',
'__sizeof__',
'__slots__',
'__str__',
'__sub__',
'__subclasshook__',
'__xor__',
'_geom',
'_geom_prepared',
'_ndim',
'_repr_svg_',
'almost_equals',
'area',
'boundary',
'bounds',
'buffer',
'centroid',
'contains',
'contains_properly',
'convex_hull',
'coords',
'covered_by',
'covers',
'crosses',
'difference',
'disjoint',
'distance',
'dwithin',
'envelope',
'equals',
'equals_exact',
'exterior',
'from_bounds',
'geom_type',
'geometryType',
'has_z',
'hausdorff_distance',
'interiors',
'interpolate',
'intersection',
'intersects',
'is_closed',
'is_empty',
'is_ring',
'is_simple',
'is_valid',
'length',
'line_interpolate_point',
'line_locate_point',
'minimum_clearance',
'minimum_rotated_rectangle',
'normalize',
'oriented_envelope',
'overlaps',
'point_on_surface',
'project',
'relate',
'relate_pattern',
'representative_point',
'reverse',
'segmentize',
'simplify',
'svg',
'symmetric_difference',
'touches',
'type',
'union',
'within',
'wkb',
'wkb_hex',
'wkt',
'xy']
geom0.bounds(-80.6688232421875, 40.39815902709961, -80.52220916748047, 40.63713836669922)
geom0.exterior.coords.xy(array('d', [-80.6280517578125, -80.60203552246094, -80.62545776367188, -80.6336441040039, -80.6688232421875, -80.66793060302734, -80.63754272460938, -80.61175537109375, -80.57462310791016, -80.52220916748047, -80.52456665039062, -80.52377319335938, -80.6280517578125]),
array('d', [40.39815902709961, 40.480472564697266, 40.504398345947266, 40.53913879394531, 40.568214416503906, 40.58207321166992, 40.61391830444336, 40.619998931884766, 40.615909576416016, 40.63713836669922, 40.47871780395508, 40.4029655456543, 40.39815902709961]))
geom0.wkt'POLYGON ((-80.6280517578125 40.39815902709961, -80.60203552246094 40.480472564697266, -80.62545776367188 40.504398345947266, -80.6336441040039 40.53913879394531, -80.6688232421875 40.568214416503906, -80.66793060302734 40.58207321166992, -80.63754272460938 40.61391830444336, -80.61175537109375 40.619998931884766, -80.57462310791016 40.615909576416016, -80.52220916748047 40.63713836669922, -80.52456665039062 40.47871780395508, -80.52377319335938 40.4029655456543, -80.6280517578125 40.39815902709961))'
rp0 = geom0.representative_point()rp0rp0.xy(array('d', [-80.57673846928705]), array('d', [40.52176856994629]))
geom0.contains(rp0)True
rp0.within(geom0)True
Attributes
south_gdf.columnsIndex(['NAME', 'STATE_NAME', 'STATE_FIPS', 'CNTY_FIPS', 'FIPS', 'STFIPS',
'COFIPS', 'FIPSNO', 'SOUTH', 'HR60', 'HR70', 'HR80', 'HR90', 'HC60',
'HC70', 'HC80', 'HC90', 'PO60', 'PO70', 'PO80', 'PO90', 'RD60', 'RD70',
'RD80', 'RD90', 'PS60', 'PS70', 'PS80', 'PS90', 'UE60', 'UE70', 'UE80',
'UE90', 'DV60', 'DV70', 'DV80', 'DV90', 'MA60', 'MA70', 'MA80', 'MA90',
'POL60', 'POL70', 'POL80', 'POL90', 'DNL60', 'DNL70', 'DNL80', 'DNL90',
'MFIL59', 'MFIL69', 'MFIL79', 'MFIL89', 'FP59', 'FP69', 'FP79', 'FP89',
'BLK60', 'BLK70', 'BLK80', 'BLK90', 'GI59', 'GI69', 'GI79', 'GI89',
'FH60', 'FH70', 'FH80', 'FH90', 'geometry'],
dtype='object')
south_gdf.explore(column='HR60')Make this Notebook Trusted to load map: File -> Trust Notebook
south_gdf.explore(column='HR60', tooltip=['HR60', 'STATE_NAME', 'NAME', 'HR90'])Make this Notebook Trusted to load map: File -> Trust Notebook
south_gdf.HR60.describe()count 1412.000000
mean 7.292144
std 6.421018
min 0.000000
25% 3.213471
50% 6.245125
75% 9.956272
max 92.936803
Name: HR60, dtype: float64
ax = south_gdf.plot(column='HR60')
ax.set_axis_off();
How many states are there in this dataset
south_gdf.STATE_NAME.unique().shape(17,)
How many counties?
south_gdf.shape[0]1412
How many counties in each state?
south_gdf[['STATE_NAME', 'FIPS']].groupby(by='STATE_NAME').count()| FIPS | |
|---|---|
| STATE_NAME | |
| Alabama | 67 |
| Arkansas | 75 |
| Delaware | 3 |
| District of Columbia | 1 |
| Florida | 67 |
| Georgia | 159 |
| Kentucky | 120 |
| Louisiana | 64 |
| Maryland | 24 |
| Mississippi | 82 |
| North Carolina | 100 |
| Oklahoma | 77 |
| South Carolina | 46 |
| Tennessee | 95 |
| Texas | 254 |
| Virginia | 123 |
| West Virginia | 55 |
Which state had the highest median county homicide rate in 1960?
south_gdf[['STATE_NAME', 'HR60']].groupby(by='STATE_NAME').median()| HR60 | |
|---|---|
| STATE_NAME | |
| Alabama | 9.623977 |
| Arkansas | 4.704111 |
| Delaware | 4.228385 |
| District of Columbia | 10.471807 |
| Florida | 9.970306 |
| Georgia | 9.300076 |
| Kentucky | 5.235436 |
| Louisiana | 6.840286 |
| Maryland | 5.335208 |
| Mississippi | 8.919274 |
| North Carolina | 7.633043 |
| Oklahoma | 4.269126 |
| South Carolina | 7.509437 |
| Tennessee | 4.877751 |
| Texas | 4.326215 |
| Virginia | 6.672004 |
| West Virginia | 2.623226 |
Which county had the highest maximum county homicide rate in 1960?
south_gdf[['STATE_NAME', 'HR60']].groupby(by='STATE_NAME').max()| HR60 | |
|---|---|
| STATE_NAME | |
| Alabama | 24.903499 |
| Arkansas | 21.154427 |
| Delaware | 7.286472 |
| District of Columbia | 10.471807 |
| Florida | 40.744262 |
| Georgia | 53.304904 |
| Kentucky | 37.250885 |
| Louisiana | 18.243736 |
| Maryland | 14.327234 |
| Mississippi | 24.833923 |
| North Carolina | 25.660127 |
| Oklahoma | 17.088175 |
| South Carolina | 23.345940 |
| Tennessee | 20.894275 |
| Texas | 92.936803 |
| Virginia | 23.575639 |
| West Virginia | 11.482375 |
Intra-state dispersion
south_gdf[['STATE_NAME', 'HR60']].groupby(by='STATE_NAME').std()| HR60 | |
|---|---|
| STATE_NAME | |
| Alabama | 4.742337 |
| Arkansas | 4.574625 |
| Delaware | 1.815562 |
| District of Columbia | NaN |
| Florida | 7.990692 |
| Georgia | 7.906488 |
| Kentucky | 6.354316 |
| Louisiana | 4.189146 |
| Maryland | 4.064360 |
| Mississippi | 4.972698 |
| North Carolina | 4.596952 |
| Oklahoma | 4.231132 |
| South Carolina | 4.018644 |
| Tennessee | 4.354979 |
| Texas | 8.223844 |
| Virginia | 4.826707 |
| West Virginia | 2.773659 |
sgdf = south_gdf[['STATE_NAME', 'HR60']].groupby(by='STATE_NAME').std()cv = sgdf / south_gdf[['STATE_NAME', 'HR60']].groupby(by='STATE_NAME').mean() * 100cv.sort_values(by='HR60', ascending=False)| HR60 | |
|---|---|
| STATE_NAME | |
| Texas | 144.992919 |
| Kentucky | 96.815524 |
| West Virginia | 93.234007 |
| Arkansas | 81.223752 |
| Oklahoma | 81.114430 |
| Tennessee | 75.426226 |
| Georgia | 73.774440 |
| Maryland | 71.898559 |
| Florida | 68.252692 |
| Virginia | 66.924041 |
| Louisiana | 59.994571 |
| Mississippi | 57.457024 |
| North Carolina | 57.013871 |
| Alabama | 49.070812 |
| South Carolina | 48.083524 |
| Delaware | 34.966796 |
| District of Columbia | NaN |
Centroids and Projections
cents = south_gdf.geometry.centroid/var/folders/_n/1q8cd8t93vd7g0sp8v27v25h0000gn/T/ipykernel_9063/735428549.py:1: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
cents = south_gdf.geometry.centroid
south_gdf.estimate_utm_crs()<Projected CRS: EPSG:32615>
Name: WGS 84 / UTM zone 15N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 96°W and 90°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Manitoba; Nunavut; Ontario. Ecuador -Galapagos. Guatemala. Mexico. United States (USA).
- bounds: (-96.0, 0.0, -90.0, 84.0)
Coordinate Operation:
- name: UTM zone 15N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
geometry_utm = south_gdf.geometry.to_crs(south_gdf.estimate_utm_crs())geometry_utm0 POLYGON ((1551180.383 4546131.160, 1552088.484...
1 POLYGON ((1563606.155 4521109.514, 1558163.321...
2 POLYGON ((1565916.726 4505557.619, 1547448.547...
3 POLYGON ((1570744.723 4471954.424, 1544275.138...
4 POLYGON ((1987822.322 4503609.764, 1978677.613...
...
1407 POLYGON ((1743591.835 4135077.022, 1736966.984...
1408 POLYGON ((1699722.097 4250803.339, 1698098.723...
1409 POLYGON ((1646297.679 4187243.370, 1644842.996...
1410 POLYGON ((1980788.647 4238384.975, 1980504.623...
1411 POLYGON ((1851127.891 4383780.417, 1829264.981...
Name: geometry, Length: 1412, dtype: geometry
geometry_utm.plot()<Axes: >

south_gdf.plot()<Axes: >

south_gdf['geometry_utm'] = geometry_utmsouth_gdf.plot()<Axes: >

south_gdf.set_geometry('geometry_utm', inplace=True)south_gdf.plot()<Axes: >

south_gdf.crs<Projected CRS: EPSG:32615>
Name: WGS 84 / UTM zone 15N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 96°W and 90°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Manitoba; Nunavut; Ontario. Ecuador -Galapagos. Guatemala. Mexico. United States (USA).
- bounds: (-96.0, 0.0, -90.0, 84.0)
Coordinate Operation:
- name: UTM zone 15N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
cents = south_gdf.geometry.centroidcents.plot()<Axes: >

cents.plot(markersize=2)<Axes: >

base = south_gdf.plot()
cents.plot(ax=base, color='yellow', markersize=1)<Axes: >

south_gdf.set_geometry('geometry', inplace=True)base = south_gdf.plot()
cents.plot(ax=base, color='yellow', markersize=1)<Axes: >

south_gdf.crs<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
cents.crs<Projected CRS: EPSG:32615>
Name: WGS 84 / UTM zone 15N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 96°W and 90°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Manitoba; Nunavut; Ontario. Ecuador -Galapagos. Guatemala. Mexico. United States (USA).
- bounds: (-96.0, 0.0, -90.0, 84.0)
Coordinate Operation:
- name: UTM zone 15N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich